林嶔 (Lin, Chin)
Lesson 23
上週的課程已帶大家實作最基本的卷積神經網路-LeNet,這週開始我們要帶著大家了解卷積神經網路後續在圖像辨識上的爆炸性發展。
ImageNet是2007年由史丹福大學教授李飛飛創辦,其收集大量帶有標註信息的圖片數據供電腦視覺模型訓練,而每年將會利用其資料進行ILSVRC圖像識別競賽。
– AlexNet -> VGGNet -> GoogLeNet -> ResNet
使用ReLU做為非線性變換的激活函數
使用Dropout技術
使用overlap的max pooling
數據增強
使用GPU加速深度卷積網絡的訓練
– VGGNet論文中全部使用了3x3的卷積核和2x2的池化核,通過不斷加深網絡結構來提升性能。
他的核心概念是透過2個3x3的卷積核取代5x5的卷積核,從而減少參數並探討其效益。
這篇研究透過比較了上述6個神經網路,告訴了我們幾個鐵則:
越深的網絡效果越好
1x1的卷積核也顯著提升效能
– 除此之外,他引入了Network In Network的概念,在卷積層內使用大量的1x1的卷積核。
– 假設有一個比較淺的網絡達到了飽和的準確率,那麼後面再加上幾個的全等映射層,起碼誤差不會增加,因此設計出了Residual unit:
– 們可以下載該模型進行預測
library(mxnet)
library(imager)
library(magrittr)
#Load a pre-training residual network model
res_model = mx.model.load("model/resnet-18", 0)
res_sym = mx.symbol.load("model/resnet-18-symbol.json")
#Define image processing functions
preproc.image <- function(im, mean.image = NULL) {
# crop the image
shape <- dim(im)
if (shape[1] != shape[2]) {
short.edge <- min(shape[1:2])
xx <- floor((shape[1] - short.edge) / 2)
yy <- floor((shape[2] - short.edge) / 2)
cropped <- crop.borders(im, xx, yy)
} else {
cropped <- array(im, dim = c(shape, 1, 1))
cropped <- cropped/max(cropped)
}
# resize to 224 x 224, needed by input of the model.
resized <- resize(cropped, 224, 224)
# convert to array (x, y, channel)
arr <- as.array(resized) * 255
dim(arr) <- dim(arr)[-3]
# subtract the mean
if (is.null(mean.image)) {mean.image = mean(arr)}
normed <- arr - mean.image
# Reshape to format needed by mxnet (width, height, channel, num)
dim(normed) <- c(dim(normed), 1)
return(normed)
}
#Read image and display
img <- load.image(system.file("extdata/parrots.png", package="imager"))
par(mar=rep(0,4))
plot(NA, xlim = 0:1, ylim = 0:1, xaxt = "n", yaxt = "n", bty = "n")
rasterImage(img, -0.04, -0.04, 1.04, 1.04, interpolate=FALSE)
#Pre-processing
normed <- preproc.image(img)
#Predict
prob <- predict(res_model, X = normed, ctx = mx.cpu())
which.max(prob)
## [1] 89
– 編號與Label對照表請訪問這裡
#Get features
all_layers = res_sym$get.internals()
flatten0_output = which(all_layers$outputs == 'flatten0_output') %>% all_layers$get.output()
fc1_output = which(all_layers$outputs == 'fc1_output') %>% all_layers$get.output()
out = mx.symbol.Group(c(flatten0_output, fc1_output))
executor = mx.simple.bind(symbol = out, data = c(224, 224, 3, 1), ctx = mx.cpu())
mx.exec.update.arg.arrays(executor, res_model$arg.params, match.name = TRUE)
mx.exec.update.aux.arrays(executor, res_model$aux.params, match.name = TRUE)
mx.exec.update.arg.arrays(executor, list(data = mx.nd.array(normed)), match.name = TRUE)
mx.exec.forward(executor, is.train = FALSE)
feature = as.array(executor$ref.outputs$flatten0_output)
dim(feature)
## [1] 512 1
#verification-1
FC_COEF = res_model$arg.params$fc1_weight %>% as.array
FC_BIAS = res_model$arg.params$fc1_bias %>% as.array
VERIFICATION = t(feature)%*%FC_COEF + FC_BIAS
FC1_OUTPUT = executor$ref.outputs$fc1_output %>% as.array %>% as.matrix %>% t
head(t(rbind(FC1_OUTPUT, VERIFICATION)))
## [,1] [,2]
## [1,] -2.762232 -2.762232
## [2,] 5.176768 5.176767
## [3,] -1.369514 -1.369514
## [4,] -4.658741 -4.658742
## [5,] -2.485924 -2.485924
## [6,] -3.269172 -3.269171
#verification-1
new.prob <- exp(VERIFICATION)/sum(exp(VERIFICATION))
head(cbind(prob, t(new.prob)))
## [,1] [,2]
## [1,] 6.529687e-09 6.529699e-09
## [2,] 1.831285e-05 1.831288e-05
## [3,] 2.628705e-08 2.628711e-08
## [4,] 9.800515e-10 9.800529e-10
## [5,] 8.607785e-09 8.607802e-09
## [6,] 3.933069e-09 3.933076e-09
– 首先要先做檔案的前處理及特徵萃取
library(mxnet)
library(imager)
library(magrittr)
#Define image processing functions
preproc.image <- function(im, mean.image = NULL) {
# crop the image
shape <- dim(im)
if (shape[1] != shape[2]) {
short.edge <- min(shape[1:2])
xx <- floor((shape[1] - short.edge) / 2)
yy <- floor((shape[2] - short.edge) / 2)
cropped <- crop.borders(im, xx, yy)
} else {
cropped <- array(im, dim = c(shape, 1, 1))
cropped <- cropped/max(cropped)
}
# resize to 224 x 224, needed by input of the model.
resized <- resize(cropped, 224, 224)
# convert to array (x, y, channel)
arr <- as.array(resized) * 255
dim(arr) <- dim(arr)[-3]
# subtract the mean
if (is.null(mean.image)) {mean.image = mean(arr)}
normed <- arr - mean.image
# Reshape to format needed by mxnet (width, height, channel, num)
dim(normed) <- c(dim(normed), 1)
return(normed)
}
#Load model information
res_model = mx.model.load("model/resnet-18", 0)
res_sym = mx.symbol.load("model/resnet-18-symbol.json")
#Read and split data
MNIST <- read.csv('data/train.csv', header=TRUE)
MNIST <- data.matrix(MNIST)
train.x <- MNIST[1:300, -1]
train.y <- MNIST[1:300, 1]
test.x <- MNIST[301:500, -1]
test.y <- MNIST[301:500, 1]
train.x.array <- matrix(NA, nrow = 300, ncol = 512)
test.x.array <- matrix(NA, nrow = 200, ncol = 512)
#Define symbol
all_layers = res_sym$get.internals()
flatten0_output = which(all_layers$outputs == 'flatten0_output') %>% all_layers$get.output()
fc1_output = which(all_layers$outputs == 'fc1_output') %>% all_layers$get.output()
out = mx.symbol.Group(c(flatten0_output, fc1_output))
executor = mx.simple.bind(symbol = out, data = c(224, 224, 3, 1), ctx = mx.cpu())
mx.exec.update.arg.arrays(executor, res_model$arg.params, match.name = TRUE)
mx.exec.update.aux.arrays(executor, res_model$aux.params, match.name = TRUE)
#Get Features
for (i in 1:300) {
normed = preproc.image(array(train.x[i,], dim = c(28, 28, 1)))
normed = normed[,,rep(1, 3),] %>% array(., dim = c(224, 224, 3, 1))
mx.exec.update.arg.arrays(executor, list(data = mx.nd.array(normed)), match.name = TRUE)
mx.exec.forward(executor, is.train = FALSE)
train.x.array[i,] = as.array(executor$ref.outputs$flatten0_output) %>% as.numeric
}
for (i in 1:200) {
normed = preproc.image(array(test.x[i,], dim = c(28, 28, 1)))
normed = normed[,,rep(1, 3),] %>% array(., dim = c(224, 224, 3, 1))
mx.exec.update.arg.arrays(executor, list(data = mx.nd.array(normed)), match.name = TRUE)
mx.exec.forward(executor, is.train = FALSE)
test.x.array[i,] = as.array(executor$ref.outputs$flatten0_output) %>% as.numeric
}
library(e1071)
svm.model = svm(x = train.x.array, y = factor(train.y))
pred.train.y = predict(svm.model, train.x.array)
print(table(pred.train.y, train.y))
## train.y
## pred.train.y 0 1 2 3 4 5 6 7 8 9
## 0 41 0 0 0 0 0 0 0 0 0
## 1 0 33 0 0 0 0 0 0 0 0
## 2 0 0 32 0 0 0 0 0 0 0
## 3 0 0 0 30 0 0 0 0 0 0
## 4 0 0 0 0 28 0 0 0 0 0
## 5 0 0 0 0 0 21 0 0 0 0
## 6 0 0 0 0 0 0 30 0 1 0
## 7 0 0 0 0 0 1 0 29 0 0
## 8 0 0 1 0 0 0 0 0 22 0
## 9 0 0 0 0 0 0 0 0 0 31
pred.test.y = predict(svm.model, test.x.array)
print(table(pred.test.y, test.y))
## test.y
## pred.test.y 0 1 2 3 4 5 6 7 8 9
## 0 15 0 0 0 0 0 1 1 1 2
## 1 0 17 0 0 0 0 0 1 0 0
## 2 0 0 24 1 0 7 0 0 0 0
## 3 0 0 2 16 0 8 0 0 1 1
## 4 0 0 0 0 23 0 1 0 0 0
## 5 0 0 1 1 0 5 0 0 0 0
## 6 0 0 1 0 0 0 14 0 2 0
## 7 0 0 1 0 0 0 0 16 0 1
## 8 0 0 1 0 0 0 0 0 14 1
## 9 0 0 0 0 3 0 0 0 0 17
library(randomForest)
rf.model = randomForest(x = train.x.array, y = factor(train.y))
pred.train.y = predict(rf.model, train.x.array)
print(table(pred.train.y, train.y))
## train.y
## pred.train.y 0 1 2 3 4 5 6 7 8 9
## 0 41 0 0 0 0 0 0 0 0 0
## 1 0 33 0 0 0 0 0 0 0 0
## 2 0 0 33 0 0 0 0 0 0 0
## 3 0 0 0 30 0 0 0 0 0 0
## 4 0 0 0 0 28 0 0 0 0 0
## 5 0 0 0 0 0 22 0 0 0 0
## 6 0 0 0 0 0 0 30 0 0 0
## 7 0 0 0 0 0 0 0 29 0 0
## 8 0 0 0 0 0 0 0 0 23 0
## 9 0 0 0 0 0 0 0 0 0 31
pred.test.y = predict(rf.model, test.x.array)
print(table(pred.test.y, test.y))
## test.y
## pred.test.y 0 1 2 3 4 5 6 7 8 9
## 0 15 0 0 0 0 0 1 0 1 2
## 1 0 17 0 0 0 0 0 1 0 0
## 2 0 0 21 1 0 5 1 0 0 0
## 3 0 0 3 16 0 6 1 0 2 1
## 4 0 0 1 0 22 0 1 0 0 1
## 5 0 0 0 1 0 7 0 0 0 0
## 6 0 0 0 0 2 0 12 0 2 1
## 7 0 0 1 0 0 0 0 16 0 0
## 8 0 0 3 0 0 2 0 0 13 1
## 9 0 0 1 0 2 0 0 1 0 16
– 首先要先做檔案的前處理(因為用CPU運算的原因,這裡僅做30個TRAIN DATA)
#Resized to 224*224
MNIST <- read.csv('data/train.csv', header=TRUE)
MNIST <- data.matrix(MNIST)
train.x <- MNIST[1:30, -1]
train.y <- MNIST[1:30, 1]
train.x.array <- t(train.x)
dim(train.x.array) <- c(28, 28, 1, nrow(train.x))
norm_train.x.array = array(NA, dim = c(224, 224, 3, nrow(train.x)))
for (i in 1:nrow(train.x)) {
norm_train.x.array[,,,i] = preproc.image(train.x.array[,,,i])
}
#Define model
flatten0_output = which(all_layers$outputs == 'flatten0_output') %>% all_layers$get.output()
fc1_weight = mx.symbol.Variable('fc1_weight')
fc1_bias = mx.symbol.Variable('fc1_bias')
bn1 = mx.symbol.BatchNorm(data = flatten0_output)
fc1 = mx.symbol.FullyConnected(data = bn1, weight = fc1_weight, bias = fc1_bias, num_hidden = 10)
softmax = mx.symbol.SoftmaxOutput(fc1, name='softmax')
##Start to train model
mx.set.seed(0)
logger = mx.metric.logger$new()
cnn.model = mx.model.FeedForward.create(softmax,
X = norm_train.x.array,
y = train.y,
ctx = mx.cpu(),
num.round = 30,
array.batch.size = 10,
learning.rate = 0.1,
momentum = 0.9, wd = 0.00001,
eval.metric = mx.metric.accuracy,
epoch.end.callback = mx.callback.log.train.metric(100, logger))
pred.data = norm_train.x.array[,,,7:10]
prob = predict(cnn.model, pred.data)
pred.label = max.col(t(prob)) - 1
print(pred.label)
## [1] 4 7 7 3
par(mar = rep(0, 4), mfcol = c(2, 2))
for (i in 1:4) {
plot(NA, xlim = 0:1, ylim = 0:1, xaxt = "n", yaxt = "n", bty = "n")
rasterImage(t(train.x.array[,,,6+i]/255), -0.04, -0.04, 1.04, 1.04, interpolate=FALSE)
}
– 想想牛頓、高斯、愛因斯坦他們有使用這些方法嗎?為什麼他們能精準地預測眾多物理、天文現象?
– 這些機器學習方法僅僅是在我們無法描述X與Y的關係時幫助我們做複雜函數的擬合,使我們能在不清楚他們的關係前精準地預測。